from org.apache.lucene.index import *
from org.apache.lucene.analysis.standard import *
from org.apache.lucene.document import *
from org.pdfbox.searchengine.lucene import *
from java.io import *
import os

dataDir = r'C:\jylab\examples\files'
indexDir = r'C:\jylab\examples\index'

writer = IndexWriter(indexDir, StandardAnalyzer(), 1)
dir = File(dataDir)
list = dir.list()
for item in list:
    if item.endswith('.pdf'):
        name = apply(os.path.join, [dataDir, item])
        file = File(name)
        doc = LucenePDFDocument.getDocument(file)
        doc.add(Field.Text("contents",
                           FileReader(file)))
        doc.add(Field.Keyword("filename",
                              file.getCanonicalPath()))
        writer.addDocument(doc)
writer.optimize()
writer.close()

